knitr::opts_chunk$set(
cache = TRUE,
autodep = TRUE,
cache.lazy = FALSE,
cache.comments = FALSE
)
options(scipen = 999)
cbpalette <- multi.utils::cbpalette()
library(tidyverse)
rstan::rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
source("functions.R")
Create true values dataframe:
true_values <- data.frame(
parameter = c(
"beta_prev",
"log_sigma_phi_prev",
"beta_anc",
"log_sigma_b_anc",
"beta_art",
"log_sigma_phi_art"
),
true_value = c(
-2.4,
log(sqrt(1 / 4)),
-0.2,
log(sqrt(1/ 100)),
0.7,
log(0.35)
)
)
Read in results created in reports:
# model0 <- readRDS("depends/results_model0.rds")
model1 <- readRDS("depends/results_model1.rds")
model2 <- readRDS("depends/results_model2.rds")
model3 <- readRDS("depends/results_model3.rds")
model4 <- readRDS("depends/results_model4.rds")
# draw_boxplots(model0) + labs(title = "Model 0")
# draw_boxplots(model1) + labs(title = "Model 1")
# draw_boxplots(model2) + labs(title = "Model 2")
# draw_boxplots(model3) + labs(title = "Model 3")
# draw_boxplots(model4) + labs(title = "Model 4")
# draw_scatterplots(model0) + labs(title = "Model 0")
draw_scatterplots(model1) + labs(title = "Model 1")
## Warning: Removed 300 rows containing missing values (geom_point).
draw_scatterplots(model2) + labs(title = "Model 2")
## Warning: Removed 720 rows containing missing values (geom_point).
draw_scatterplots(model3) + labs(title = "Model 3")
## Warning: Removed 360 rows containing missing values (geom_point).
draw_scatterplots(model4) + labs(title = "Model 4")
## Warning: Removed 1080 rows containing missing values (geom_point).
Take samples from all distributions, then compute maximum ECDF
difference \(D\) (two-sample
Kolmogorov–Smirnov test). We assume that tmbstan represents
the gold-standard, and compare performance of aghq and
TMB via their similarity to this gold-standard. Higher
values of \(D\) correspond to
distributions which are more different.
# draw_ksplots_D(model0) + labs(title = "Model 0")
draw_ksplots_D(model1) + labs(title = "Model 1")
## Warning: Removed 14 rows containing missing values (geom_point).
draw_ksplots_D(model2) + labs(title = "Model 2")
## Warning: Removed 83 rows containing missing values (geom_point).
draw_ksplots_D(model3) + labs(title = "Model 3")
## Warning: Removed 27 rows containing missing values (geom_point).
draw_ksplots_D(model4) + labs(title = "Model 4")
## Warning: Removed 96 rows containing missing values (geom_point).
We could also assess \(l\), the location of \(D\). Determining if there are patterns in the location of the greatest ECDF difference could present us with useful insights.
# draw_ksplots_D(model0) + labs(title = "Model 0")
draw_ksplots_l(model1) + labs(title = "Model 1")
draw_ksplots_l(model2) + labs(title = "Model 2")
draw_ksplots_l(model3) + labs(title = "Model 3")
draw_ksplots_l(model4) + labs(title = "Model 4")
When using Markov chain Monte Carlo (MCMC) methods, as we have for
tmbstan, it’s important to assess for convergence.
One way to do this is via traceplots which visualise the chains over the number of iterations specified.
# map(model0, "mcmc_traceplots")
map(model1, "mcmc_traceplots")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
map(model2, "mcmc_traceplots")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
map(model3, "mcmc_traceplots")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
map(model4, "mcmc_traceplots")
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
The \(\hat R\) statistic (“R hat”) can also be used. A value of \(\hat R < 1.1\) is typically sufficient.
# draw_rhatplot(model0) + labs(title = "Model 0")
draw_rhatplot(model1) + labs(title = "Model 1")
draw_rhatplot(model2) + labs(title = "Model 2")
draw_rhatplot(model3) + labs(title = "Model 3")
draw_rhatplot(model4) + labs(title = "Model 4")